import os
import xai
import logging as log
import warnings
import matplotlib.pyplot as plt
import sys, os
from util.commons import *
from util.ui import *
from util.model import *
from util.split import *
from util.dataset import *
from IPython.display import display, HTML
For this example we are going to use 'Adult Census Dataset'. It consists of both categorical and numerical features.
dataset, msg = get_dataset('census')
display(msg)
display(dataset.df)
"Dataset 'census (Adult census dataset)' loaded successfully. For further information about this dataset please visit: https://ethicalml.github.io/xai/index.html?highlight=load_census#xai.data.load_census"
| age | workclass | education | education-num | marital-status | occupation | relationship | ethnicity | gender | capital-gain | capital-loss | hours-per-week | loan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 39 | State-gov | Bachelors | 13 | Never-married | Adm-clerical | Not-in-family | White | Male | 2174 | 0 | 40 | <=50K |
| 1 | 50 | Self-emp-not-inc | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 13 | <=50K |
| 2 | 38 | Private | HS-grad | 9 | Divorced | Handlers-cleaners | Not-in-family | White | Male | 0 | 0 | 40 | <=50K |
| 3 | 53 | Private | 11th | 7 | Married-civ-spouse | Handlers-cleaners | Husband | Black | Male | 0 | 0 | 40 | <=50K |
| 4 | 28 | Private | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Wife | Black | Female | 0 | 0 | 40 | <=50K |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 32556 | 27 | Private | Assoc-acdm | 12 | Married-civ-spouse | Tech-support | Wife | White | Female | 0 | 0 | 38 | <=50K |
| 32557 | 40 | Private | HS-grad | 9 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0 | 0 | 40 | >50K |
| 32558 | 58 | Private | HS-grad | 9 | Widowed | Adm-clerical | Unmarried | White | Female | 0 | 0 | 40 | <=50K |
| 32559 | 22 | Private | HS-grad | 9 | Never-married | Adm-clerical | Own-child | White | Male | 0 | 0 | 20 | <=50K |
| 32560 | 52 | Self-emp-inc | HS-grad | 9 | Married-civ-spouse | Exec-managerial | Wife | White | Female | 15024 | 0 | 40 | >50K |
32561 rows × 13 columns
There are values in the dataset that are unknown (\?). In this step all rows containing such values are going to be removed.
dataset.df['workclass'].unique()
array([' State-gov', ' Self-emp-not-inc', ' Private', ' Federal-gov',
' Local-gov', ' ?', ' Self-emp-inc', ' Without-pay',
' Never-worked'], dtype=object)
dataset.df.loc[dataset.df['workclass'] == ' ?']
| age | workclass | education | education-num | marital-status | occupation | relationship | ethnicity | gender | capital-gain | capital-loss | hours-per-week | loan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 27 | 54 | ? | Some-college | 10 | Married-civ-spouse | ? | Husband | Asian-Pac-Islander | Male | 0 | 0 | 60 | >50K |
| 61 | 32 | ? | 7th-8th | 4 | Married-spouse-absent | ? | Not-in-family | White | Male | 0 | 0 | 40 | <=50K |
| 69 | 25 | ? | Some-college | 10 | Never-married | ? | Own-child | White | Male | 0 | 0 | 40 | <=50K |
| 77 | 67 | ? | 10th | 6 | Married-civ-spouse | ? | Husband | White | Male | 0 | 0 | 2 | <=50K |
| 106 | 17 | ? | 10th | 6 | Never-married | ? | Own-child | White | Female | 34095 | 0 | 32 | <=50K |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 32530 | 35 | ? | Bachelors | 13 | Married-civ-spouse | ? | Wife | White | Female | 0 | 0 | 55 | >50K |
| 32531 | 30 | ? | Bachelors | 13 | Never-married | ? | Not-in-family | Asian-Pac-Islander | Female | 0 | 0 | 99 | <=50K |
| 32539 | 71 | ? | Doctorate | 16 | Married-civ-spouse | ? | Husband | White | Male | 0 | 0 | 10 | >50K |
| 32541 | 41 | ? | HS-grad | 9 | Separated | ? | Not-in-family | Black | Female | 0 | 0 | 32 | <=50K |
| 32542 | 72 | ? | HS-grad | 9 | Married-civ-spouse | ? | Husband | White | Male | 0 | 0 | 25 | <=50K |
1836 rows × 13 columns
dataset.df = remove_undefined_rows(' ?', dataset.df)
dataset.df['workclass'].unique()
array([' State-gov', ' Self-emp-not-inc', ' Private', ' Federal-gov',
' Local-gov', ' Self-emp-inc', ' Without-pay'], dtype=object)
Three visualization functions offered by the XAI module will be used for analyzing the dataset.
%matplotlib inline
plt.style.use('ggplot')
warnings.filterwarnings('ignore')
imbalanced_cols = ['gender', 'ethnicity']
xai.imbalance_plot(dataset.df, *imbalanced_cols)
_ = xai.correlations(dataset.df, include_categorical=True, plot_type="matrix", plt_kwargs={'figsize': (6, 6)})
_ = xai.correlations(dataset.df, include_categorical=True, plt_kwargs={'figsize': (8, 6)})
21-Oct-21 14:56:28 - No categorical_cols passed so inferred using np.object, np.int8 and np.bool: Index(['workclass', 'education', 'marital-status', 'occupation',
'relationship', 'ethnicity', 'gender', 'loan'],
dtype='object'). If you see an error these are not correct, please provide them as a string array as: categorical_cols=['col1', 'col2', ...]
In the cell below the target variable is selected. In this example we will use the column loan as target variable, which shows whether a person earns more than 50k (>50K | <=50K) per year.
df_X, df_y, msg = split_feature_target(dataset.df, "loan")
df_y
21-Oct-21 14:56:32 - Target 'loan' selected successfully.
0 <=50K
1 <=50K
2 <=50K
3 <=50K
4 <=50K
...
30713 <=50K
30714 >50K
30715 <=50K
30716 <=50K
30717 >50K
Name: loan, Length: 30718, dtype: object
Four models are going to be trained on this dataset. In the output below we can see accuracy, classification reports, confusion matrix and ROC Curve for each model.
# Create empty models
initial_models, msg = fill_empty_models(df_X, df_y, 4)
models = []
model1 = initial_models[0]
msg = fill_model(model1, Algorithm.LOGISTIC_REGRESSION, Split(SplitTypes.NORMAL))
models.append(model1)
model_1 = models[0]
21-Oct-21 14:56:34 - Model accuracy: 0.8065321180555556
21-Oct-21 14:56:36 - Classification report:
precision recall f1-score support
<=50K 0.94 0.80 0.86 6921
>50K 0.58 0.84 0.68 2295
accuracy 0.81 9216
macro avg 0.76 0.82 0.77 9216
weighted avg 0.85 0.81 0.82 9216
21-Oct-21 14:56:37 - Model Model 1 trained successfully!
model2 = initial_models[1]
msg = fill_model(model2, Algorithm.DECISION_TREE, Split(SplitTypes.NORMAL))
models.append(model2)
model_2 = models[1]
21-Oct-21 14:56:40 - Model accuracy: 0.8110894097222222
21-Oct-21 14:56:42 - Classification report:
precision recall f1-score support
<=50K 0.89 0.86 0.87 6921
>50K 0.61 0.67 0.64 2295
accuracy 0.81 9216
macro avg 0.75 0.76 0.75 9216
weighted avg 0.82 0.81 0.81 9216
21-Oct-21 14:56:43 - Model Model 2 trained successfully!
model3 = initial_models[2]
msg = fill_model(model3, Algorithm.RANDOM_FOREST, Split(SplitTypes.NORMAL))
models.append(model3)
model_3 = models[2]
21-Oct-21 14:56:57 - Model accuracy: 0.83984375
21-Oct-21 14:56:58 - Classification report:
precision recall f1-score support
<=50K 0.89 0.90 0.89 6921
>50K 0.69 0.65 0.67 2295
accuracy 0.84 9216
macro avg 0.79 0.78 0.78 9216
weighted avg 0.84 0.84 0.84 9216
21-Oct-21 14:57:01 - Model Model 3 trained successfully!
model4 = initial_models[3]
msg = fill_model(model4, Algorithm.SVC, Split(SplitTypes.NORMAL))
models.append(model4)
model_4 = models[3]
21-Oct-21 15:06:00 - Model accuracy: 0.7981770833333334
21-Oct-21 15:06:02 - Classification report:
precision recall f1-score support
<=50K 0.81 0.96 0.88 6921
>50K 0.72 0.31 0.44 2295
accuracy 0.80 9216
macro avg 0.76 0.64 0.66 9216
weighted avg 0.79 0.80 0.77 9216
21-Oct-21 15:07:47 - Model Model 4 trained successfully!
In the following steps we will use global interpretation techniques that help us to answer questions like how does a model behave in general? What features drive predictions and what features are completely useless. This data may be very important in understanding the model better. Most of the techniques work by investigating the conditional interactions between the target variable and the features on the complete dataset.
The importance of a feature is the increase in the prediction error of the model after we permuted the feature’s values, which breaks the relationship between the feature and the true outcome. A feature is “important” if permuting it increases the model error. This is because in that case, the model relied heavily on this feature for making right prediction. On the other hand, a feature is “unimportant” if permuting it doesn’t affect the error by much or doesn’t change it at all.
In the first case, we use ELI5, which does not permute the features but only visualizes the weight of each feature.
plot = generate_feature_importance_plot(FeatureImportanceType.ELI5, model_1)
display(plot)
21-Oct-21 15:07:47 - Generating a feature importance plot using ELI5 for Model 1 ...
y= >50K top features
| Weight? | Feature |
|---|---|
| +0.989 | relationship_ Wife |
| +0.793 | marital-status_ Married-civ-spouse |
| +0.681 | occupation_ Exec-managerial |
| +0.488 | education_ Prof-school |
| … 14 more positive … | |
| … 29 more negative … | |
| -0.510 | education_ 11th |
| -0.514 | ethnicity_ White |
| -0.531 | occupation_ Machine-op-inspct |
| -0.538 | education_ 7th-8th |
| -0.553 | occupation_ Handlers-cleaners |
| -0.566 | marital-status_ Divorced |
| -0.599 | gender_ Male |
| -0.619 | ethnicity_ Black |
| -0.672 | workclass_ Self-emp-not-inc |
| -0.741 | relationship_ Unmarried |
| -0.812 | occupation_ Farming-fishing |
| -1.037 | occupation_ Other-service |
| -1.224 | relationship_ Own-child |
| -1.229 | marital-status_ Never-married |
| -1.241 | gender_ Female |
| -1.840 | <BIAS> |
plot = generate_feature_importance_plot(FeatureImportanceType.ELI5, model_2)
display(plot)
21-Oct-21 15:07:47 - Generating a feature importance plot using ELI5 for Model 2 ...
| Weight | Feature |
|---|---|
| 0.2905 | marital-status_ Married-civ-spouse |
| 0.1640 | age |
| 0.1024 | capital-gain |
| 0.1002 | education-num |
| 0.0935 | hours-per-week |
| 0.0283 | capital-loss |
| 0.0113 | occupation_ Exec-managerial |
| 0.0111 | workclass_ Private |
| 0.0099 | workclass_ Self-emp-not-inc |
| 0.0096 | occupation_ Prof-specialty |
| 0.0090 | ethnicity_ White |
| 0.0090 | occupation_ Sales |
| 0.0086 | occupation_ Craft-repair |
| 0.0083 | workclass_ Local-gov |
| 0.0076 | workclass_ Self-emp-inc |
| 0.0075 | occupation_ Adm-clerical |
| 0.0073 | occupation_ Other-service |
| 0.0072 | gender_ Female |
| 0.0069 | workclass_ State-gov |
| 0.0066 | relationship_ Wife |
| … 42 more … | |
plot = generate_feature_importance_plot(FeatureImportanceType.ELI5, model_3)
display(plot)
21-Oct-21 15:07:50 - Generating a feature importance plot using ELI5 for Model 3 ...
| Weight | Feature |
|---|---|
| 0.2217 ± 0.0638 | age |
| 0.1071 ± 0.0252 | hours-per-week |
| 0.0767 ± 0.1807 | marital-status_ Married-civ-spouse |
| 0.0759 ± 0.0327 | capital-gain |
| 0.0698 ± 0.1635 | relationship_ Husband |
| 0.0669 ± 0.0616 | education-num |
| 0.0444 ± 0.1121 | marital-status_ Never-married |
| 0.0243 ± 0.0089 | capital-loss |
| 0.0173 ± 0.0175 | occupation_ Exec-managerial |
| 0.0160 ± 0.0367 | relationship_ Wife |
| 0.0152 ± 0.0353 | relationship_ Not-in-family |
| 0.0149 ± 0.0145 | occupation_ Prof-specialty |
| 0.0147 ± 0.0529 | relationship_ Own-child |
| 0.0147 ± 0.0431 | gender_ Male |
| 0.0124 ± 0.0200 | occupation_ Other-service |
| 0.0124 ± 0.0046 | workclass_ Private |
| 0.0119 ± 0.0373 | gender_ Female |
| 0.0116 ± 0.0172 | education_ Bachelors |
| 0.0092 ± 0.0036 | workclass_ Self-emp-not-inc |
| 0.0086 ± 0.0103 | education_ HS-grad |
| … 42 more … | |
plot = generate_feature_importance_plot(FeatureImportanceType.ELI5, model_4)
display(plot)
21-Oct-21 15:07:50 - Generating a feature importance plot using ELI5 for Model 4 ... 21-Oct-21 15:07:50 - SVC not is supported by FeatureImportanceType.ELI5.
None
print(generate_feature_importance_explanation(FeatureImportanceType.ELI5, models, 4))
21-Oct-21 15:07:50 - Generating feature importance explanation for ELI5 ... 21-Oct-21 15:07:52 - SVC not supported for ELI5 explanations.
Summary: The most important feature for Model 1 is relationship_ Wife with weight ~0.989. The 2nd highest feature for Model 1 is marital-status_ Married-civ-spouse with weight ~0.793. The 3rd most important feature for Model 1 is occupation_ Exec-managerial with weight ~0.681. The 4th most important feature for Model 1 is education_ Prof-school with weight ~0.488. The most important feature for Model 2 is marital-status_ Married-civ-spouse with weight ~0.291, same as 2nd for Model 1. The 2nd most important feature for Model 2 is age with weight ~0.164. The 3rd most valuable feature for Model 2 is capital-gain with weight ~0.102. The 4th best feature for Model 2 is education-num with weight ~0.1. The highest feature for Model 3 is age with weight ~0.222, alike 2nd for Model 2. The 2nd most influential feature for Model 3 is hours-per-week with weight ~0.107. The 3rd most influential feature for Model 3 is marital-status_ Married-civ-spouse with weight ~0.077, similar to 2nd for Model 1. The 4th best feature for Model 3 is capital-gain with weight ~0.076, alike 3rd for Model 2.
%matplotlib inline
plt.rcParams['figure.figsize'] = [14, 15]
plt.style.use('ggplot')
warnings.filterwarnings('ignore')
_ = generate_feature_importance_plot(FeatureImportanceType.SKATER, model_1)
21-Oct-21 15:07:52 - Generating a feature importance plot using SKATER for Model 1 ... 21-Oct-21 15:07:53 - Initializing Skater - generating new in-memory model. This operation may be time-consuming so please be patient. 2021-10-21 15:08:09,993 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[62/62] features ████████████████████ Time elapsed: 19 seconds
_ = generate_feature_importance_plot(FeatureImportanceType.SKATER, model_2)
21-Oct-21 15:08:31 - Generating a feature importance plot using SKATER for Model 2 ... 21-Oct-21 15:08:31 - Initializing Skater - generating new in-memory model. This operation may be time-consuming so please be patient. 2021-10-21 15:08:47,707 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[62/62] features ████████████████████ Time elapsed: 17 seconds
_ = generate_feature_importance_plot(FeatureImportanceType.SKATER, model_3)
21-Oct-21 15:09:07 - Generating a feature importance plot using SKATER for Model 3 ... 21-Oct-21 15:09:07 - Initializing Skater - generating new in-memory model. This operation may be time-consuming so please be patient. 2021-10-21 15:09:23,963 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[62/62] features ████████████████████ Time elapsed: 33 seconds
_ = generate_feature_importance_plot(FeatureImportanceType.SKATER, model_4)
21-Oct-21 15:09:59 - Generating a feature importance plot using SKATER for Model 4 ... 21-Oct-21 15:09:59 - Initializing Skater - generating new in-memory model. This operation may be time-consuming so please be patient. 2021-10-21 15:11:29,149 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[62/62] features ████████████████████ Time elapsed: 81 seconds
print('\n' + generate_feature_importance_explanation(FeatureImportanceType.SKATER, models, 4))
21-Oct-21 15:12:52 - Generating feature importance explanation for SKATER ... 2021-10-21 15:14:13,974 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[62/62] features ████████████████████ Time elapsed: 39 seconds
2021-10-21 15:16:15,760 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[62/62] features ████████████████████ Time elapsed: 37 seconds
2021-10-21 15:18:14,883 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[62/62] features ████████████████████ Time elapsed: 49 seconds
2021-10-21 15:20:45,289 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[62/62] features ████████████████████ Time elapsed: 360 seconds Summary: The most important feature for Model 1 is gender_ Female with weight ~0.091. The 2nd highest feature for Model 1 is marital-status_ Never-married with weight ~0.085. The 3rd most influential feature for Model 1 is marital-status_ Married-civ-spouse with weight ~0.076. The 4th most valuable feature for Model 1 is education-num with weight ~0.061. The highest feature for Model 2 is age with weight ~0.16. The 2nd best feature for Model 2 is marital-status_ Married-civ-spouse with weight ~0.142, identical to 3rd for Model 1. The 3rd best feature for Model 2 is education-num with weight ~0.138, similar to 4th for Model 1. The 4th most influential feature for Model 2 is hours-per-week with weight ~0.11. The highest feature for Model 3 is age with weight ~0.133, similar to 1st for Model 2. The 2nd highest feature for Model 3 is hours-per-week with weight ~0.088, similar to 4th for Model 2. The 3rd most valuable feature for Model 3 is education-num with weight ~0.076, same as 4th for Model 1. The 4th most valuable feature for Model 3 is relationship_ Husband with weight ~0.069. The most valuable feature for Model 4 is capital-gain with weight ~0.61. The 2nd most valuable feature for Model 4 is capital-loss with weight ~0.354. The 3rd best feature for Model 4 is age with weight ~0.018, similar to 1st for Model 2. The 4th most influential feature for Model 4 is hours-per-week with weight ~0.015, similar to 4th for Model 2.
In the cell below we use the SHAP (SHapley Additive exPlanations). It uses a combination of feature contributions and game theory to come up with SHAP values. Then, it computes the global feature importance by taking the average of the SHAP value magnitudes across the dataset.
from shap import initjs
initjs()
%matplotlib inline
plt.style.use('ggplot')
warnings.filterwarnings('ignore')
generate_feature_importance_plot(FeatureImportanceType.SHAP, model_1)
21-Oct-21 15:26:45 - Generating a feature importance plot using SHAP for Model 1 ... 21-Oct-21 15:26:45 - Initializing Shap - calculating shap values. This operation is time-consuming so please be patient.
generate_feature_importance_plot(FeatureImportanceType.SHAP, model_2)
21-Oct-21 15:43:13 - Generating a feature importance plot using SHAP for Model 2 ... 21-Oct-21 15:43:13 - Initializing Shap - calculating shap values. This operation is time-consuming so please be patient.
generate_feature_importance_plot(FeatureImportanceType.SHAP, model_3)
21-Oct-21 15:59:25 - Generating a feature importance plot using SHAP for Model 3 ... 21-Oct-21 15:59:25 - Initializing Shap - calculating shap values. This operation is time-consuming so please be patient.
generate_feature_importance_plot(FeatureImportanceType.SHAP, model_4)
21-Oct-21 16:33:23 - Generating a feature importance plot using SHAP for Model 4 ... 21-Oct-21 16:33:23 - Initializing Shap - calculating shap values. This operation is time-consuming so please be patient.
print(generate_feature_importance_explanation(FeatureImportanceType.SHAP, models, 4))
22-Oct-21 13:50:01 - Generating feature importance explanation for SHAP ...
Summary: The most influential feature for Model 1 is capital-gain with weight ~0.16. The 2nd most valuable feature for Model 1 is marital-status_ Married-civ-spouse with weight ~0.149. The 3rd most influential feature for Model 1 is gender_ Female with weight ~0.133. The 4th highest feature for Model 1 is marital-status_ Never-married with weight ~0.125. The best feature for Model 2 is marital-status_ Married-civ-spouse with weight ~0.397, identical to 2nd for Model 1. The 2nd most valuable feature for Model 2 is age with weight ~0.135. The 3rd most influential feature for Model 2 is education-num with weight ~0.117. The 4th highest feature for Model 2 is hours-per-week with weight ~0.101. The most important feature for Model 3 is marital-status_ Married-civ-spouse with weight ~0.159, alike 2nd for Model 1. The 2nd highest feature for Model 3 is relationship_ Husband with weight ~0.104. The 3rd most influential feature for Model 3 is age with weight ~0.083, same as 2nd for Model 2. The 4th most valuable feature for Model 3 is education-num with weight ~0.07, matching 3rd for Model 2. The highest feature for Model 4 is capital-gain with weight ~0.141, same as 1st for Model 1. The 2nd most valuable feature for Model 4 is capital-loss with weight ~0.09. The 3rd best feature for Model 4 is age with weight ~0.003, similar to 2nd for Model 2. The 4th most influential feature for Model 4 is hours-per-week with weight ~0.002, alike 4th for Model 2.
The partial dependence plot (short PDP or PD plot) shows the marginal effect one or two features have on the predicted outcome of a machine learning model. A partial dependence plot can show whether the relationship between the target and a feature is linear, monotonic or more complex. For example, when applied to a linear regression model, partial dependence plots always show a linear relationship.
PDPBox is the first module that we use for ploting partial dependence. We will generate two plots, one for only one feature - age and one for two features - age and education-num.
generate_pdp_plots(PDPType.PDPBox, model_1, "age", "None")
generate_pdp_plots(PDPType.PDPBox, model_1, "age", "education-num")
22-Oct-21 13:50:01 - Generating a PDP plot using PDPBox for Model 1 ... 22-Oct-21 13:50:12 - Generating a PDP plot using PDPBox for Model 1 ...
generate_pdp_plots(PDPType.PDPBox, model_2, "age", "None")
generate_pdp_plots(PDPType.PDPBox, model_2, "age", "education-num")
22-Oct-21 13:50:23 - Generating a PDP plot using PDPBox for Model 2 ... 22-Oct-21 13:50:34 - Generating a PDP plot using PDPBox for Model 2 ...
generate_pdp_plots(PDPType.PDPBox, model_3, "age", "None")
generate_pdp_plots(PDPType.PDPBox, model_3, "age", "education-num")
22-Oct-21 13:50:45 - Generating a PDP plot using PDPBox for Model 3 ... 22-Oct-21 13:50:58 - Generating a PDP plot using PDPBox for Model 3 ...
generate_pdp_plots(PDPType.PDPBox, model_4, "age", "None")
generate_pdp_plots(PDPType.PDPBox, model_4, "age", "education-num")
22-Oct-21 13:51:21 - Generating a PDP plot using PDPBox for Model 4 ... 22-Oct-21 13:57:19 - Generating a PDP plot using PDPBox for Model 4 ...
In the two examples below we will use Skater and SHAP for generating PDPs using features: age and education-num.
generate_pdp_plots(PDPType.SKATER, model_1, "age", "education-num")
22-Oct-21 14:48:13 - Generating a PDP plot using SKATER for Model 1 ... 2021-10-22 14:48:21,734 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progressbar=False
[1152/1152] grid cells ████████████████████ Time elapsed: 338 seconds
generate_pdp_plots(PDPType.SKATER, model_2, "age", "education-num")
22-Oct-21 14:54:01 - Generating a PDP plot using SKATER for Model 2 ... 2021-10-22 14:54:10,127 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progressbar=False
[1152/1152] grid cells ████████████████████ Time elapsed: 295 seconds
generate_pdp_plots(PDPType.SKATER, model_3, "age", "education-num")
22-Oct-21 14:59:07 - Generating a PDP plot using SKATER for Model 3 ... 2021-10-22 14:59:15,757 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progressbar=False
[1152/1152] grid cells ████████████████████ Time elapsed: 480 seconds
generate_pdp_plots(PDPType.SKATER, model_4, "age", "education-num")
22-Oct-21 15:07:17 - Generating a PDP plot using SKATER for Model 4 ... 2021-10-22 15:07:25,926 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progressbar=False
[1152/1152] grid cells ████████████████████ Time elapsed: 1518 seconds
generate_pdp_plots(PDPType.SHAP, model_1, "age", "education-num")
22-Oct-21 15:32:45 - Generating a PDP plot using SHAP for Model 1 ...
generate_pdp_plots(PDPType.SHAP, model_2, "age", "education-num")
22-Oct-21 15:32:46 - Generating a PDP plot using SHAP for Model 2 ...
generate_pdp_plots(PDPType.SHAP, model_3, "age", "education-num")
22-Oct-21 15:32:46 - Generating a PDP plot using SHAP for Model 3 ...
generate_pdp_plots(PDPType.SHAP, model_4, "age", "education-num")
22-Oct-21 15:32:47 - Generating a PDP plot using SHAP for Model 4 ...
Local interpretation focuses on specifics of each individual and provides explanations that can lead to a better understanding of the feature contribution in smaller groups of individuals that are often overlooked by the global interpretation techniques. We will use two moduels for interpreting single instances - SHAP and LIME.
SHAP leverages the idea of Shapley values for model feature influence scoring. The technical definition of a Shapley value is the “average marginal contribution of a feature value over all possible coalitions.” In other words, Shapley values consider all possible predictions for an instance using all possible combinations of inputs. Because of this exhaustive approach, SHAP can guarantee properties like consistency and local accuracy. LIME, on the other hand, does not offer such guarantees.
LIME (Local Interpretable Model-agnostic Explanations) builds sparse linear models around each prediction to explain how the black box model works in that local vicinity. While treating the model as a black box, we perturb the instance we want to explain and learn a sparse linear model around it, as an explanation. LIME has the advantage over SHAP, that it is a lot faster.
examples = []
example_types = [ExampleType.FALSELY_CLASSIFIED]
for example_type in example_types:
for model in models:
example = get_test_examples(model, example_type, 1)[0]
while example in examples:
example = get_test_examples(model, example_type, 1)[0]
examples.append(example)
display(examples)
[5225, 5371, 4944, 3906]
example = examples[0]
print(get_example_information(model_1, example))
print(generate_single_instance_comparison(models, example))
Example 5225's data: age 28 workclass Private education Bachelors education-num 13 marital-status Never-married occupation Craft-repair relationship Own-child ethnicity White gender Male capital-gain 0 capital-loss 0 hours-per-week 40 Name: 22995, dtype: object Actual result for example 5225: >50K Example 5225 was truly classified by no model and falsely classified by Model 1, Model 2, Model 3, Model 4. For further clarification see the explanations below.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_1, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_1, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_1, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_1, example))
display(explanation)
22-Oct-21 15:33:23 - Generating a single instance explanation using LIME for Model 1 ... 22-Oct-21 15:33:23 - Initializing LIME - generating new explainer. This operation may be time-consuming so please be patient. 22-Oct-21 15:33:28 - Generating a single instance explanation using LIME for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.93. LIME's explanation: The feature that largely impacts Model 1's positive (1) prediction probability is gender= Male with value of 0.0922. The feature that mostly impacts Model 1's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5801. The feature with the second most substantial affect on Model 1's negative (0) prediction probability is marital-status= Never-married with value of -0.2403.
22-Oct-21 15:33:32 - Generating a single instance explanation using SHAP for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.93. SHAP's explanation: The feature that mostly influences Model 1's positive (1) prediction probability is marital-status_ Never-married with value of 0.2165. The feature with the second most substantial impact on Model 1's positive (1) prediction probability is relationship_ Own-child with value of 0.2156. The third most impactful feature for the positive (1) prediction probability of Model 1 is capital-gain with value of 0.0614 The feature that primarily impacts Model 1's negative (0) prediction probability is education-num with value of -0.0629. The feature with the second most substantial influence on Model 1's negative (0) prediction probability is education_ Bachelors with value of -0.0242.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_2, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_2, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_2, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_2, example))
display(explanation)
22-Oct-21 15:33:32 - Generating a single instance explanation using LIME for Model 2 ... 22-Oct-21 15:33:32 - Initializing LIME - generating new explainer. This operation may be time-consuming so please be patient. 22-Oct-21 15:33:37 - Generating a single instance explanation using LIME for Model 2 ...
The prediction probability of Model 2's decision for this example is 1.0. LIME's explanation: The feature that mostly impacts Model 2's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.0609. The feature that mostly influences Model 2's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.6627. The feature with the second most substantial impact on Model 2's negative (0) prediction probability is marital-status= Never-married with value of -0.1162.
22-Oct-21 15:33:41 - Generating a single instance explanation using SHAP for Model 2 ...
The prediction probability of Model 2's decision for this example is 1.0. SHAP's explanation:
explanation = explain_single_instance(LocalInterpreterType.LIME, model_3, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_3, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_3, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_3, example))
display(explanation)
22-Oct-21 15:33:41 - Generating a single instance explanation using LIME for Model 3 ... 22-Oct-21 15:33:41 - Initializing LIME - generating new explainer. This operation may be time-consuming so please be patient. 22-Oct-21 15:33:46 - Generating a single instance explanation using LIME for Model 3 ...
The prediction probability of Model 3's decision for this example is 1.0. LIME's explanation: The feature that primarily impacts Model 3's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.4403. The feature with the second largest influence on Model 3's negative (0) prediction probability is marital-status= Never-married with value of -0.1013.
22-Oct-21 15:33:51 - Generating a single instance explanation using SHAP for Model 3 ...
The prediction probability of Model 3's decision for this example is 1.0. SHAP's explanation: The feature that primarily impacts Model 3's positive (1) prediction probability is marital-status_ Never-married with value of 0.0861. The feature with the second most substantial impact on Model 3's positive (1) prediction probability is age with value of 0.0284. The third most influential feature for the positive (1) prediction probability of Model 3 is relationship_ Own-child with value of 0.0207 The feature that largely influences Model 3's negative (0) prediction probability is education-num with value of -0.0194. The feature with the second most considerable impact on Model 3's negative (0) prediction probability is education_ Bachelors with value of -0.0097.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_4, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_4, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_4, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_4, example))
display(explanation)
22-Oct-21 15:33:52 - Generating a single instance explanation using LIME for Model 4 ... 22-Oct-21 15:33:52 - Initializing LIME - generating new explainer. This operation may be time-consuming so please be patient. 22-Oct-21 15:34:15 - Generating a single instance explanation using LIME for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.81. LIME's explanation: The feature that mainly impacts Model 4's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.4309. The feature with the second most substantial impact on Model 4's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.3617.
22-Oct-21 15:35:14 - Generating a single instance explanation using SHAP for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.81. SHAP's explanation: The feature that mainly changes Model 4's positive (1) prediction probability is capital-loss with value of 0.0262. The feature with the second biggest affect on Model 4's positive (1) prediction probability is age with value of 0.0014. The third most impactful feature for the positive (1) prediction probability of Model 4 is hours-per-week with value of 0.0001 The feature that mostly influences Model 4's negative (0) prediction probability is capital-gain with value of -0.0356. The feature with the second most substantial affect on Model 4's negative (0) prediction probability is education-num with value of -0.0002.
example = examples[1]
print(get_example_information(model_1, example))
print(generate_single_instance_comparison(models, example))
Example 5371's data: age 43 workclass Private education Some-college education-num 10 marital-status Married-civ-spouse occupation Machine-op-inspct relationship Husband ethnicity Black gender Male capital-gain 0 capital-loss 0 hours-per-week 50 Name: 9517, dtype: object Actual result for example 5371: <=50K Example 5371 was truly classified by Model 3, Model 4 and falsely classified by Model 1, Model 2. For further clarification see the explanations below.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_1, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_1, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_1, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_1, example))
display(explanation)
22-Oct-21 15:36:24 - Generating a single instance explanation using LIME for Model 1 ... 22-Oct-21 15:36:28 - Generating a single instance explanation using LIME for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.5. LIME's explanation: The feature that mostly influences Model 1's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.2699. The feature with the second most substantial affect on Model 1's positive (1) prediction probability is hours-per-week > 45.00 with value of 0.1139. The third most impactful feature for the positive (1) prediction probability of Model 1 is gender= Male with value of 0.088 The feature that largely impacts Model 1's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5755. The feature with the second most considerable change on Model 1's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1873.
22-Oct-21 15:36:33 - Generating a single instance explanation using SHAP for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.5. SHAP's explanation: The feature that mainly affects Model 1's positive (1) prediction probability is marital-status_ Married-civ-spouse with value of 0.1828. The feature with the second most substantial impact on Model 1's positive (1) prediction probability is ethnicity_ White with value of 0.1191. The third most impactful feature for the positive (1) prediction probability of Model 1 is hours-per-week with value of 0.0585 The feature that largely impacts Model 1's negative (0) prediction probability is ethnicity_ Black with value of -0.1446. The feature with the second most substantial change on Model 1's negative (0) prediction probability is occupation_ Machine-op-inspct with value of -0.124.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_2, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_2, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_2, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_2, example))
display(explanation)
22-Oct-21 15:36:33 - Generating a single instance explanation using LIME for Model 2 ... 22-Oct-21 15:36:37 - Generating a single instance explanation using LIME for Model 2 ...
The prediction probability of Model 2's decision for this example is 1.0. LIME's explanation: The feature that primarily impacts Model 2's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1259. The feature with the second largest change on Model 2's positive (1) prediction probability is hours-per-week > 45.00 with value of 0.1083. The third most effective feature for the positive (1) prediction probability of Model 2 is 37.00 < age <= 47.00 with value of 0.0737 The feature that mainly affects Model 2's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.6763. The feature with the second most considerable impact on Model 2's negative (0) prediction probability is occupation= Machine-op-inspct with value of -0.0631.
22-Oct-21 15:36:42 - Generating a single instance explanation using SHAP for Model 2 ...
The prediction probability of Model 2's decision for this example is 1.0. SHAP's explanation: The feature that primarily changes Model 2's positive (1) prediction probability is marital-status_ Married-civ-spouse with value of 0.3854. The feature with the second largest affect on Model 2's positive (1) prediction probability is ethnicity_ White with value of 0.3133. The third most influential feature for the positive (1) prediction probability of Model 2 is hours-per-week with value of 0.1436 The feature that largely changes Model 2's negative (0) prediction probability is ethnicity_ Black with value of -0.0238. The feature with the second most considerable impact on Model 2's negative (0) prediction probability is age with value of -0.023.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_3, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_3, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_3, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_3, example))
display(explanation)
22-Oct-21 15:36:42 - Generating a single instance explanation using LIME for Model 3 ... 22-Oct-21 15:36:47 - Generating a single instance explanation using LIME for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.63. LIME's explanation: The feature that largely impacts Model 3's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1097. The feature with the second biggest change on Model 3's positive (1) prediction probability is hours-per-week > 45.00 with value of 0.0781. The third most impactful feature for the positive (1) prediction probability of Model 3 is relationship= Husband with value of 0.0706 The feature that mostly influences Model 3's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.4358.
22-Oct-21 15:36:51 - Generating a single instance explanation using SHAP for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.63. SHAP's explanation: The feature that primarily changes Model 3's positive (1) prediction probability is occupation_ Machine-op-inspct with value of 0.0633. The feature with the second largest influence on Model 3's positive (1) prediction probability is ethnicity_ Black with value of 0.0605. The third most effective feature for the positive (1) prediction probability of Model 3 is age with value of 0.0454 The feature that mostly affects Model 3's negative (0) prediction probability is marital-status_ Married-civ-spouse with value of -0.1893. The feature with the second biggest affect on Model 3's negative (0) prediction probability is relationship_ Husband with value of -0.1572.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_4, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_4, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_4, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_4, example))
display(explanation)
22-Oct-21 15:36:52 - Generating a single instance explanation using LIME for Model 4 ... 22-Oct-21 15:37:15 - Generating a single instance explanation using LIME for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.8. LIME's explanation: The feature that primarily affects Model 4's positive (1) prediction probability is hours-per-week > 45.00 with value of 0.0073. The feature that mainly affects Model 4's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.4155. The feature with the second biggest impact on Model 4's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.3734.
22-Oct-21 15:38:14 - Generating a single instance explanation using SHAP for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.8. SHAP's explanation: The feature that primarily impacts Model 4's positive (1) prediction probability is capital-loss with value of 0.0263. The feature with the second most considerable affect on Model 4's positive (1) prediction probability is ethnicity_ White with value of 0.0. The third most impactful feature for the positive (1) prediction probability of Model 4 is ethnicity_ Black with value of 0.0 The feature that primarily impacts Model 4's negative (0) prediction probability is capital-gain with value of -0.0359. The feature with the second biggest affect on Model 4's negative (0) prediction probability is hours-per-week with value of -0.0012.
example = examples[2]
print(get_example_information(model_1, example))
print(generate_single_instance_comparison(models, example))
Example 4944's data: age 44 workclass Private education Some-college education-num 10 marital-status Married-civ-spouse occupation Handlers-cleaners relationship Husband ethnicity White gender Male capital-gain 0 capital-loss 0 hours-per-week 40 Name: 3993, dtype: object Actual result for example 4944: <=50K Example 4944 was truly classified by Model 1, Model 4 and falsely classified by Model 2, Model 3. For further clarification see the explanations below.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_1, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_1, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_1, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_1, example))
display(explanation)
22-Oct-21 15:39:24 - Generating a single instance explanation using LIME for Model 1 ... 22-Oct-21 15:39:28 - Generating a single instance explanation using LIME for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.54. LIME's explanation: The feature that primarily impacts Model 1's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.2698. The feature with the second most considerable impact on Model 1's positive (1) prediction probability is gender= Male with value of 0.0886. The feature that largely affects Model 1's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5832. The feature with the second most substantial impact on Model 1's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1703.
22-Oct-21 15:39:33 - Generating a single instance explanation using SHAP for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.54. SHAP's explanation: The feature that mainly affects Model 1's positive (1) prediction probability is occupation_ Handlers-cleaners with value of 0.1329. The feature with the second largest change on Model 1's positive (1) prediction probability is capital-gain with value of 0.0864. The third most influential feature for the positive (1) prediction probability of Model 1 is education_ Some-college with value of 0.044 The feature that largely changes Model 1's negative (0) prediction probability is marital-status_ Married-civ-spouse with value of -0.1865. The feature with the second most substantial affect on Model 1's negative (0) prediction probability is age with value of -0.0232.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_2, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_2, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_2, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_2, example))
display(explanation)
22-Oct-21 15:39:33 - Generating a single instance explanation using LIME for Model 2 ... 22-Oct-21 15:39:38 - Generating a single instance explanation using LIME for Model 2 ...
The prediction probability of Model 2's decision for this example is 1.0. LIME's explanation: The feature that mostly impacts Model 2's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1633. The feature with the second most considerable change on Model 2's positive (1) prediction probability is 37.00 < age <= 47.00 with value of 0.0353. The feature that mostly changes Model 2's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.6822. The feature with the second most considerable impact on Model 2's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.119.
22-Oct-21 15:39:42 - Generating a single instance explanation using SHAP for Model 2 ...
The prediction probability of Model 2's decision for this example is 1.0. SHAP's explanation: The feature that mainly affects Model 2's positive (1) prediction probability is marital-status_ Married-civ-spouse with value of 0.6209. The feature with the second most considerable influence on Model 2's positive (1) prediction probability is age with value of 0.3084. The third most effective feature for the positive (1) prediction probability of Model 2 is occupation_ Handlers-cleaners with value of 0.058 The feature that largely influences Model 2's negative (0) prediction probability is relationship_ Husband with value of -0.025.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_3, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_3, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_3, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_3, example))
display(explanation)
22-Oct-21 15:39:42 - Generating a single instance explanation using LIME for Model 3 ... 22-Oct-21 15:39:47 - Generating a single instance explanation using LIME for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.85. LIME's explanation: The feature that mainly impacts Model 3's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1149. The feature with the second most substantial change on Model 3's positive (1) prediction probability is relationship= Husband with value of 0.0759. The third most effective feature for the positive (1) prediction probability of Model 3 is 37.00 < age <= 47.00 with value of 0.0451 The feature that primarily impacts Model 3's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.4472. The feature with the second most considerable impact on Model 3's negative (0) prediction probability is hours-per-week <= 40.00 with value of -0.0753.
22-Oct-21 15:39:52 - Generating a single instance explanation using SHAP for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.85. SHAP's explanation: The feature that primarily affects Model 3's positive (1) prediction probability is marital-status_ Married-civ-spouse with value of 0.2174. The feature with the second most substantial influence on Model 3's positive (1) prediction probability is relationship_ Husband with value of 0.1749. The third most important feature for the positive (1) prediction probability of Model 3 is education_ Some-college with value of 0.1471
explanation = explain_single_instance(LocalInterpreterType.LIME, model_4, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_4, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_4, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_4, example))
display(explanation)
22-Oct-21 15:39:52 - Generating a single instance explanation using LIME for Model 4 ... 22-Oct-21 15:40:16 - Generating a single instance explanation using LIME for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.81. LIME's explanation: The feature that mostly influences Model 4's positive (1) prediction probability is education= Some-college with value of 0.0031. The feature that mostly changes Model 4's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.4347. The feature with the second most considerable impact on Model 4's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.3784.
22-Oct-21 15:41:14 - Generating a single instance explanation using SHAP for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.81. SHAP's explanation: The feature that mostly influences Model 4's positive (1) prediction probability is capital-loss with value of 0.0263. The feature with the second most substantial influence on Model 4's positive (1) prediction probability is hours-per-week with value of 0.0001. The third most impactful feature for the positive (1) prediction probability of Model 4 is occupation_ Handlers-cleaners with value of 0.0 The feature that mostly influences Model 4's negative (0) prediction probability is capital-gain with value of -0.0358. The feature with the second most substantial change on Model 4's negative (0) prediction probability is age with value of -0.0006.
example = examples[3]
print(get_example_information(model_1, example))
print(generate_single_instance_comparison(models, example))
Example 3906's data: age 47 workclass Federal-gov education Bachelors education-num 13 marital-status Married-civ-spouse occupation Prof-specialty relationship Husband ethnicity White gender Male capital-gain 0 capital-loss 0 hours-per-week 40 Name: 21678, dtype: object Actual result for example 3906: >50K Example 3906 was truly classified by Model 1, Model 2, Model 3 and falsely classified by Model 4. For further clarification see the explanations below.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_1, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_1, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_1, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_1, example))
display(explanation)
22-Oct-21 15:42:25 - Generating a single instance explanation using LIME for Model 1 ... 22-Oct-21 15:42:29 - Generating a single instance explanation using LIME for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.9. LIME's explanation: The feature that mainly affects Model 1's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.274. The feature with the second most substantial influence on Model 1's positive (1) prediction probability is gender= Male with value of 0.1063. The feature that largely changes Model 1's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5851. The feature with the second most substantial affect on Model 1's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.2278.
22-Oct-21 15:42:34 - Generating a single instance explanation using SHAP for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.9. SHAP's explanation: The feature that primarily impacts Model 1's positive (1) prediction probability is marital-status_ Married-civ-spouse with value of 0.1362. The feature with the second biggest change on Model 1's positive (1) prediction probability is occupation_ Prof-specialty with value of 0.0764. The third most effective feature for the positive (1) prediction probability of Model 1 is workclass_ Private with value of 0.074 The feature that largely influences Model 1's negative (0) prediction probability is capital-gain with value of -0.0607. The feature with the second largest influence on Model 1's negative (0) prediction probability is capital-loss with value of -0.0159.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_2, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_2, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_2, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_2, example))
display(explanation)
22-Oct-21 15:42:34 - Generating a single instance explanation using LIME for Model 2 ... 22-Oct-21 15:42:38 - Generating a single instance explanation using LIME for Model 2 ...
The prediction probability of Model 2's decision for this example is 1.0. LIME's explanation: The feature that mainly affects Model 2's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1506. The feature with the second biggest impact on Model 2's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.0579. The third most important feature for the positive (1) prediction probability of Model 2 is 37.00 < age <= 47.00 with value of 0.0422 The feature that primarily impacts Model 2's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.6817. The feature with the second most considerable impact on Model 2's negative (0) prediction probability is hours-per-week <= 40.00 with value of -0.1062.
22-Oct-21 15:42:43 - Generating a single instance explanation using SHAP for Model 2 ...
The prediction probability of Model 2's decision for this example is 1.0. SHAP's explanation: The feature that primarily affects Model 2's positive (1) prediction probability is marital-status_ Married-civ-spouse with value of 0.715. The feature with the second biggest affect on Model 2's positive (1) prediction probability is occupation_ Prof-specialty with value of 0.3358. The third most effective feature for the positive (1) prediction probability of Model 2 is workclass_ Federal-gov with value of 0.1762 The feature that mainly affects Model 2's negative (0) prediction probability is education-num with value of -0.2016. The feature with the second biggest change on Model 2's negative (0) prediction probability is education_ Bachelors with value of -0.0308.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_3, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_3, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_3, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_3, example))
display(explanation)
22-Oct-21 15:42:43 - Generating a single instance explanation using LIME for Model 3 ... 22-Oct-21 15:42:47 - Generating a single instance explanation using LIME for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.98. LIME's explanation: The feature that primarily influences Model 3's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1212. The feature with the second most substantial affect on Model 3's positive (1) prediction probability is relationship= Husband with value of 0.0731. The third most influential feature for the positive (1) prediction probability of Model 3 is occupation= Prof-specialty with value of 0.0711 The feature that mostly changes Model 3's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.4472. The feature with the second most considerable influence on Model 3's negative (0) prediction probability is hours-per-week <= 40.00 with value of -0.0741.
22-Oct-21 15:42:52 - Generating a single instance explanation using SHAP for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.98. SHAP's explanation: The feature that primarily affects Model 3's positive (1) prediction probability is relationship_ Husband with value of 0.2498. The feature with the second most considerable influence on Model 3's positive (1) prediction probability is marital-status_ Married-civ-spouse with value of 0.2157. The third most influential feature for the positive (1) prediction probability of Model 3 is occupation_ Prof-specialty with value of 0.1702 The feature that mainly affects Model 3's negative (0) prediction probability is workclass_ Private with value of -0.0055.
explanation = explain_single_instance(LocalInterpreterType.LIME, model_4, example)
print(generate_single_instance_explanation(LocalInterpreterType.LIME, model_4, example))
explanation.show_in_notebook(show_table=True, show_all=True)
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_4, example)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_4, example))
display(explanation)
22-Oct-21 15:42:53 - Generating a single instance explanation using LIME for Model 4 ... 22-Oct-21 15:43:16 - Generating a single instance explanation using LIME for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.81. LIME's explanation: The feature that mostly affects Model 4's positive (1) prediction probability is ethnicity= White with value of 0.0045. The feature with the second largest affect on Model 4's positive (1) prediction probability is gender= Male with value of 0.0037. The feature that largely affects Model 4's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.4198. The feature with the second most considerable impact on Model 4's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.3502.
22-Oct-21 15:44:15 - Generating a single instance explanation using SHAP for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.81. SHAP's explanation: The feature that mostly changes Model 4's positive (1) prediction probability is capital-loss with value of 0.0263. The feature with the second biggest change on Model 4's positive (1) prediction probability is hours-per-week with value of 0.0001. The feature that mainly influences Model 4's negative (0) prediction probability is capital-gain with value of -0.0358. The feature with the second most considerable change on Model 4's negative (0) prediction probability is age with value of -0.001.